/* 
 * Copyright (C) 2006, Dung-Bang Tsai
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 *
 * ( If you wnat to use this library for commercial use, 
 *	  feel free to contact me, just cost some money, I could sell
 *		you the code without GPL license, so you could use this code
 *		for your product without public your source code.  				)
 *
 * Authors:
 *	Tsai, Dung-Bang <dbtsai@gmail.com>
 *			
*			2006/03/08		at NCKU physics
 */
 
#ifndef  _TDBConvertEngine_H_
#define _TDBConvertEngine_H_
#include "wx/wx.h"
#include <vector>
#include <list>
#include <deque>

//using namespace std;
enum // You may define by using or | operator
{
	map=0x01,  						 // 是否查表直接對應
	correction = 0x02,     //字彙修正
	dest_exit = 0x04        // 目標編碼是否有這個字
};

enum
{
    Unix=1,
    Mac=2,
    DOS=3
};

class TDBstring 
{
public:

    std::vector<unsigned int> data; // 注意，最上面3bit拿來做flag, 定義如enum
 	size_t get_n_line(){return n_line;}
	size_t get_n_word(){return n_word;}
	size_t Len(){return data.size();}
	void push_back( unsigned int& ucs4);

    void Traditionalized();
    void Simplized();
    void StoT_mapping();
    void TtoS_mapping();
	int GetNewLineType(){return NewLineType;}
	
	unsigned int operator[](unsigned int i) const{return (data[i] & 0x000FFFFF);}
	
	
	TDBstring() : NewLineType(1), n_line(1) , n_word(0) , if_0D_flag(false){}
	
unsigned int display()
{
	std::vector<unsigned int>::iterator beg = data.begin();
	const std::vector<unsigned int>::iterator end = data.end();
//	TDBdetect_code_page fun;
	register unsigned int temp(0);
	unsigned char outbuf[5]={0,0,0,0,0};
//static size_t a_UCS4toUTF8(const unsigned int& src, unsigned char *outbuf);
	for( ;beg!= end ; beg++)
	{
		unsigned int src = *beg & 0x000FFFFF;
		//cout<<" "<<Number32toHex(src);
    if( src<0x80) 
	{ 
        outbuf[0]=src&0xFF;
        temp =  1;
    }        
    else if(src<0x800)
    {
        outbuf[0]= 0xC0 | (src>>6);
        outbuf[1]= 0x80 | (src&0x3F);
        temp = 2;
    }
    else if(src<0x10000)
    {
        outbuf[0]= 0xE0 | (src>>12);
        outbuf[1]= 0x80 | ((src>>6)&0x3F);
        outbuf[2]= 0x80 | (src&0x3F);
        temp =  3;
    }
    else if(src<0x110000)
    {
        outbuf[0]= 0xF0 | (src>>18);
        outbuf[1]= 0x80 | ((src>>12)&0x3F);
        outbuf[2]= 0x80 | ((src>>6)&0x3F);
        outbuf[3]= 0x80 | (src&0x3F);
       temp =  4;
    }
	outbuf[temp] = 0;
	std::cout << outbuf;
	}
}

private:
	// 1 = Unix(0x0A), 2 = Mac(0x0D), 3 = DOS(0x0D 0x0A)
	// 為最後一次的換行type, 而如果要raw data, 
	// 將紀錄在沒用的ucs4上最上面兩個bit, 也就是0x0A上最左邊兩個bit為
	// 01 = Unix(0x0A), 10 = Mac(0x0D), 11 = DOS(0x0D 0x0A)
	char NewLineType ; 

    //因為DOS的新行0x0D 0x0A,MAC 0x0D 所以遇到0D還還要看下一碼，
    //所以先turn on, 若下一碼為0A就更改new_line_type, 若不是的話就歸false
	bool if_0D_flag; 

    size_t n_line;
    size_t n_word;

};
#endif
